-
Notifications
You must be signed in to change notification settings - Fork 238
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Some cudadrv tests #2684
Some cudadrv tests #2684
Conversation
Your PR requires formatting changes to meet the project's style guidelines. Click here to view the suggested changes.diff --git a/src/texture.jl b/src/texture.jl
index fc1590423..bbdca57db 100644
--- a/src/texture.jl
+++ b/src/texture.jl
@@ -97,7 +97,7 @@ end
# idempotency
CuTextureArray{T,N}(xs::CuTextureArray{T,N}) where {T,N} = xs
-CuTextureArray(xs::CuTextureArray{T,N}) where {T,N} = xs
+CuTextureArray(xs::CuTextureArray{T, N}) where {T, N} = xs
CuTextureArray(A::AbstractArray{T,N}) where {T,N} = CuTextureArray{T,N}(A)
diff --git a/test/core/cudadrv.jl b/test/core/cudadrv.jl
index 2372cb568..7772cf9cb 100644
--- a/test/core/cudadrv.jl
+++ b/test/core/cudadrv.jl
@@ -435,15 +435,15 @@ nb = sizeof(data)
typed_pointer(buf::Union{CUDA.DeviceMemory, CUDA.UnifiedMemory}, T) = convert(CuPtr{T}, buf)
typed_pointer(buf::CUDA.HostMemory, T) = convert(Ptr{T}, buf)
-@testset "showing" begin
- for (Ty, str) in zip([CUDA.DeviceMemory, CUDA.HostMemory, CUDA.UnifiedMemory], ("DeviceMemory", "HostMemory", "UnifiedMemory"))
- dummy = CUDA.alloc(Ty, 0)
- @test startswith(sprint(show, dummy), str)
- CUDA.free(dummy)
+ @testset "showing" begin
+ for (Ty, str) in zip([CUDA.DeviceMemory, CUDA.HostMemory, CUDA.UnifiedMemory], ("DeviceMemory", "HostMemory", "UnifiedMemory"))
+ dummy = CUDA.alloc(Ty, 0)
+ @test startswith(sprint(show, dummy), str)
+ CUDA.free(dummy)
+ end
end
-end
-@testset "allocations and copies, src $srcTy dst $dstTy" for srcTy in [CUDA.DeviceMemory, CUDA.HostMemory, CUDA.UnifiedMemory],
+ @testset "allocations and copies, src $srcTy dst $dstTy" for srcTy in [CUDA.DeviceMemory, CUDA.HostMemory, CUDA.UnifiedMemory],
dstTy in [CUDA.DeviceMemory, CUDA.HostMemory, CUDA.UnifiedMemory]
dummy = CUDA.alloc(srcTy, 0)
@@ -479,7 +479,7 @@ end
# test device with context in which pointer was allocated.
@test device(typed_pointer(src, T)) == device()
- @test context(typed_pointer(src, T)) == context()
+ @test context(typed_pointer(src, T)) == context()
if !memory_pools_supported(device())
# NVIDIA bug #3319609
@test context(typed_pointer(src, T)) == context()
@@ -503,7 +503,7 @@ end
CUDA.free(dst)
end
-@testset "pointer attributes" begin
+ @testset "pointer attributes" begin
src = CUDA.alloc(CUDA.DeviceMemory, nb)
attribute!(typed_pointer(src, T), CUDA.POINTER_ATTRIBUTE_SYNC_MEMOPS, 0)
@@ -511,7 +511,7 @@ end
CUDA.free(src)
end
-@testset "asynchronous operations" begin
+ @testset "asynchronous operations" begin
src = CUDA.alloc(CUDA.DeviceMemory, nb)
unsafe_copyto!(typed_pointer(src, T), pointer(data), N; async=true)
@@ -521,7 +521,7 @@ end
CUDA.free(src)
end
-@testset "pinned memory" begin
+ @testset "pinned memory" begin
# create a pinned and mapped buffer
src = CUDA.alloc(CUDA.HostMemory, nb, CUDA.MEMHOSTALLOC_DEVICEMAP)
@@ -553,16 +553,16 @@ if attribute(device(), CUDA.DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED) != 0
CUDA.unregister(src)
- # with a RefValue
- src = Ref{T}(T(42))
- CUDA.pin(src)
- cpu_ptr = Base.unsafe_convert(Ptr{T}, src)
- ref = Array{T}(undef, 1)
- unsafe_copyto!(pointer(ref), cpu_ptr, 1)
- @test ref == [T(42)]
+ # with a RefValue
+ src = Ref{T}(T(42))
+ CUDA.pin(src)
+ cpu_ptr = Base.unsafe_convert(Ptr{T}, src)
+ ref = Array{T}(undef, 1)
+ unsafe_copyto!(pointer(ref), cpu_ptr, 1)
+ @test ref == [T(42)]
end
-@testset "unified memory" begin
+ @testset "unified memory" begin
src = CUDA.alloc(CUDA.UnifiedMemory, nb)
@test_throws BoundsError CUDA.prefetch(src, 2*nb; device=CUDA.DEVICE_CPU)
@@ -583,7 +583,7 @@ end
CUDA.free(src)
end
-@testset "3d memcpy" begin
+ @testset "3d memcpy" begin
# TODO: use cuMemAllocPitch (and put pitch in buffer?) to actually get benefit from this
data = collect(reshape(1:27, 3, 3, 3)) |
Codecov ReportAll modified and coverable lines are covered by tests ✅
Additional details and impacted files@@ Coverage Diff @@
## master #2684 +/- ##
==========================================
+ Coverage 82.57% 82.86% +0.28%
==========================================
Files 153 153
Lines 13606 13606
==========================================
+ Hits 11235 11274 +39
+ Misses 2371 2332 -39 ☔ View full report in Codecov by Sentry. 🚀 New features to boost your workflow:
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
CUDA.jl Benchmarks
Benchmark suite | Current: dbb2215 | Previous: 6bf72dd | Ratio |
---|---|---|---|
latency/precompile |
46183641505 ns |
46450283323 ns |
0.99 |
latency/ttfp |
7001382825 ns |
7028014177 ns |
1.00 |
latency/import |
3651822156 ns |
3667348189 ns |
1.00 |
integration/volumerhs |
9616637.5 ns |
9625836 ns |
1.00 |
integration/byval/slices=1 |
146680.5 ns |
146875 ns |
1.00 |
integration/byval/slices=3 |
425045 ns |
424860 ns |
1.00 |
integration/byval/reference |
144900 ns |
144959 ns |
1.00 |
integration/byval/slices=2 |
285974 ns |
285961 ns |
1.00 |
integration/cudadevrt |
103233 ns |
103332 ns |
1.00 |
kernel/indexing |
13949 ns |
14061 ns |
0.99 |
kernel/indexing_checked |
14499 ns |
14775 ns |
0.98 |
kernel/occupancy |
637.1046511627907 ns |
656.859649122807 ns |
0.97 |
kernel/launch |
2004.2 ns |
2090.2 ns |
0.96 |
kernel/rand |
14598 ns |
16643 ns |
0.88 |
array/reverse/1d |
19363 ns |
19602 ns |
0.99 |
array/reverse/2d |
24535 ns |
24553 ns |
1.00 |
array/reverse/1d_inplace |
10719 ns |
11159 ns |
0.96 |
array/reverse/2d_inplace |
12480 ns |
13010 ns |
0.96 |
array/copy |
21175 ns |
20672 ns |
1.02 |
array/iteration/findall/int |
157906 ns |
157732 ns |
1.00 |
array/iteration/findall/bool |
138756 ns |
138853.5 ns |
1.00 |
array/iteration/findfirst/int |
152794 ns |
153651.5 ns |
0.99 |
array/iteration/findfirst/bool |
154428 ns |
153926 ns |
1.00 |
array/iteration/scalar |
70496 ns |
72447 ns |
0.97 |
array/iteration/logical |
213423 ns |
206640.5 ns |
1.03 |
array/iteration/findmin/1d |
41018 ns |
40619 ns |
1.01 |
array/iteration/findmin/2d |
93431 ns |
93219 ns |
1.00 |
array/reductions/reduce/1d |
35842 ns |
34826 ns |
1.03 |
array/reductions/reduce/2d |
40715 ns |
50621 ns |
0.80 |
array/reductions/mapreduce/1d |
32909 ns |
32476 ns |
1.01 |
array/reductions/mapreduce/2d |
41241 ns |
50769 ns |
0.81 |
array/broadcast |
20512 ns |
20427 ns |
1.00 |
array/copyto!/gpu_to_gpu |
13698 ns |
11886 ns |
1.15 |
array/copyto!/cpu_to_gpu |
208479.5 ns |
207751.5 ns |
1.00 |
array/copyto!/gpu_to_cpu |
243344 ns |
245794 ns |
0.99 |
array/accumulate/1d |
108244 ns |
109015 ns |
0.99 |
array/accumulate/2d |
80382 ns |
79626 ns |
1.01 |
array/construct |
1302.7 ns |
1306 ns |
1.00 |
array/random/randn/Float32 |
43238 ns |
43298.5 ns |
1.00 |
array/random/randn!/Float32 |
26422 ns |
26052 ns |
1.01 |
array/random/rand!/Int64 |
26958 ns |
26998 ns |
1.00 |
array/random/rand!/Float32 |
8688.5 ns |
8602.333333333334 ns |
1.01 |
array/random/rand/Int64 |
29725 ns |
29780 ns |
1.00 |
array/random/rand/Float32 |
13025 ns |
12942 ns |
1.01 |
array/permutedims/4d |
61583 ns |
60894 ns |
1.01 |
array/permutedims/2d |
55519.5 ns |
55115 ns |
1.01 |
array/permutedims/3d |
55841.5 ns |
55898 ns |
1.00 |
array/sorting/1d |
2775794 ns |
2776458 ns |
1.00 |
array/sorting/by |
3367253 ns |
3369147.5 ns |
1.00 |
array/sorting/2d |
1084250 ns |
1084406 ns |
1.00 |
cuda/synchronization/stream/auto |
1004 ns |
1025.7 ns |
0.98 |
cuda/synchronization/stream/nonblocking |
6261.8 ns |
6461.2 ns |
0.97 |
cuda/synchronization/stream/blocking |
776.5849056603773 ns |
789.3663366336634 ns |
0.98 |
cuda/synchronization/context/auto |
1160.1 ns |
1164 ns |
1.00 |
cuda/synchronization/context/nonblocking |
6588 ns |
6604.6 ns |
1.00 |
cuda/synchronization/context/blocking |
903.7234042553191 ns |
889.4285714285714 ns |
1.02 |
This comment was automatically generated by workflow using github-action-benchmark.
Also added some more
@testset
blocks instead oflet